In [1]:
# !pip install git+https://github.com/alberanid/imdbpy
# !pip install pandas
# !pip install numpy
# !pip install matplotlib
# !pip install seaborn
# !pip install pandas_profiling --upgrade
# !pip install plotly
# !pip install wordcloud
# !pip install Flask
In [2]:
# Import Dataset
# Import File from Loacal Drive
# from google.colab import files
# data_to_load = files.upload()
# from google.colab import drive
# drive.mount('/content/drive')
In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import collections
import plotly.express as px
import plotly.graph_objects as go
import nltk
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
from nltk.util import ngrams
from plotly.subplots import make_subplots
from plotly.offline import iplot, init_notebook_mode
from wordcloud import WordCloud, STOPWORDS
from pandas_profiling import ProfileReport
%matplotlib inline
warnings.filterwarnings("ignore")
In [4]:
nltk.download('all')
[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package abc is already up-to-date!
[nltk_data]    | Downloading package alpino to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package alpino is already up-to-date!
[nltk_data]    | Downloading package biocreative_ppi to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package biocreative_ppi is already up-to-date!
[nltk_data]    | Downloading package brown to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package brown is already up-to-date!
[nltk_data]    | Downloading package brown_tei to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package brown_tei is already up-to-date!
[nltk_data]    | Downloading package cess_cat to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package cess_cat is already up-to-date!
[nltk_data]    | Downloading package cess_esp to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package cess_esp is already up-to-date!
[nltk_data]    | Downloading package chat80 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package chat80 is already up-to-date!
[nltk_data]    | Downloading package city_database to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package city_database is already up-to-date!
[nltk_data]    | Downloading package cmudict to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package cmudict is already up-to-date!
[nltk_data]    | Downloading package comparative_sentences to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package comparative_sentences is already up-to-
[nltk_data]    |       date!
[nltk_data]    | Downloading package comtrans to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package comtrans is already up-to-date!
[nltk_data]    | Downloading package conll2000 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package conll2000 is already up-to-date!
[nltk_data]    | Downloading package conll2002 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package conll2002 is already up-to-date!
[nltk_data]    | Downloading package conll2007 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package conll2007 is already up-to-date!
[nltk_data]    | Downloading package crubadan to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package crubadan is already up-to-date!
[nltk_data]    | Downloading package dependency_treebank to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package dependency_treebank is already up-to-date!
[nltk_data]    | Downloading package dolch to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package dolch is already up-to-date!
[nltk_data]    | Downloading package europarl_raw to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package europarl_raw is already up-to-date!
[nltk_data]    | Downloading package floresta to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package floresta is already up-to-date!
[nltk_data]    | Downloading package framenet_v15 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package framenet_v15 is already up-to-date!
[nltk_data]    | Downloading package framenet_v17 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package framenet_v17 is already up-to-date!
[nltk_data]    | Downloading package gazetteers to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package gazetteers is already up-to-date!
[nltk_data]    | Downloading package genesis to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package genesis is already up-to-date!
[nltk_data]    | Downloading package gutenberg to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package gutenberg is already up-to-date!
[nltk_data]    | Downloading package ieer to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package ieer is already up-to-date!
[nltk_data]    | Downloading package inaugural to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package inaugural is already up-to-date!
[nltk_data]    | Downloading package indian to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package indian is already up-to-date!
[nltk_data]    | Downloading package jeita to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package jeita is already up-to-date!
[nltk_data]    | Downloading package kimmo to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package kimmo is already up-to-date!
[nltk_data]    | Downloading package knbc to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package knbc is already up-to-date!
[nltk_data]    | Downloading package lin_thesaurus to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package lin_thesaurus is already up-to-date!
[nltk_data]    | Downloading package mac_morpho to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package mac_morpho is already up-to-date!
[nltk_data]    | Downloading package machado to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package machado is already up-to-date!
[nltk_data]    | Downloading package masc_tagged to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package masc_tagged is already up-to-date!
[nltk_data]    | Downloading package moses_sample to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package moses_sample is already up-to-date!
[nltk_data]    | Downloading package movie_reviews to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package movie_reviews is already up-to-date!
[nltk_data]    | Downloading package names to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package names is already up-to-date!
[nltk_data]    | Downloading package nombank.1.0 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package nombank.1.0 is already up-to-date!
[nltk_data]    | Downloading package nps_chat to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package nps_chat is already up-to-date!
[nltk_data]    | Downloading package omw to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package omw is already up-to-date!
[nltk_data]    | Downloading package opinion_lexicon to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package opinion_lexicon is already up-to-date!
[nltk_data]    | Downloading package paradigms to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package paradigms is already up-to-date!
[nltk_data]    | Downloading package pil to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package pil is already up-to-date!
[nltk_data]    | Downloading package pl196x to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package pl196x is already up-to-date!
[nltk_data]    | Downloading package ppattach to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package ppattach is already up-to-date!
[nltk_data]    | Downloading package problem_reports to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package problem_reports is already up-to-date!
[nltk_data]    | Downloading package propbank to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package propbank is already up-to-date!
[nltk_data]    | Downloading package ptb to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package ptb is already up-to-date!
[nltk_data]    | Downloading package product_reviews_1 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package product_reviews_1 is already up-to-date!
[nltk_data]    | Downloading package product_reviews_2 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package product_reviews_2 is already up-to-date!
[nltk_data]    | Downloading package pros_cons to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package pros_cons is already up-to-date!
[nltk_data]    | Downloading package qc to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package qc is already up-to-date!
[nltk_data]    | Downloading package reuters to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package reuters is already up-to-date!
[nltk_data]    | Downloading package rte to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package rte is already up-to-date!
[nltk_data]    | Downloading package semcor to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package semcor is already up-to-date!
[nltk_data]    | Downloading package senseval to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package senseval is already up-to-date!
[nltk_data]    | Downloading package sentiwordnet to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package sentiwordnet is already up-to-date!
[nltk_data]    | Downloading package sentence_polarity to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package sentence_polarity is already up-to-date!
[nltk_data]    | Downloading package shakespeare to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package shakespeare is already up-to-date!
[nltk_data]    | Downloading package sinica_treebank to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package sinica_treebank is already up-to-date!
[nltk_data]    | Downloading package smultron to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package smultron is already up-to-date!
[nltk_data]    | Downloading package state_union to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package state_union is already up-to-date!
[nltk_data]    | Downloading package stopwords to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package stopwords is already up-to-date!
[nltk_data]    | Downloading package subjectivity to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package subjectivity is already up-to-date!
[nltk_data]    | Downloading package swadesh to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package swadesh is already up-to-date!
[nltk_data]    | Downloading package switchboard to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package switchboard is already up-to-date!
[nltk_data]    | Downloading package timit to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package timit is already up-to-date!
[nltk_data]    | Downloading package toolbox to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package toolbox is already up-to-date!
[nltk_data]    | Downloading package treebank to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package treebank is already up-to-date!
[nltk_data]    | Downloading package twitter_samples to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package twitter_samples is already up-to-date!
[nltk_data]    | Downloading package udhr to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package udhr is already up-to-date!
[nltk_data]    | Downloading package udhr2 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package udhr2 is already up-to-date!
[nltk_data]    | Downloading package unicode_samples to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package unicode_samples is already up-to-date!
[nltk_data]    | Downloading package universal_treebanks_v20 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package universal_treebanks_v20 is already up-to-
[nltk_data]    |       date!
[nltk_data]    | Downloading package verbnet to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package verbnet is already up-to-date!
[nltk_data]    | Downloading package verbnet3 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package verbnet3 is already up-to-date!
[nltk_data]    | Downloading package webtext to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package webtext is already up-to-date!
[nltk_data]    | Downloading package wordnet to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package wordnet is already up-to-date!
[nltk_data]    | Downloading package wordnet_ic to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package wordnet_ic is already up-to-date!
[nltk_data]    | Downloading package words to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package words is already up-to-date!
[nltk_data]    | Downloading package ycoe to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package ycoe is already up-to-date!
[nltk_data]    | Downloading package rslp to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package rslp is already up-to-date!
[nltk_data]    | Downloading package maxent_treebank_pos_tagger to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package maxent_treebank_pos_tagger is already up-
[nltk_data]    |       to-date!
[nltk_data]    | Downloading package universal_tagset to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package universal_tagset is already up-to-date!
[nltk_data]    | Downloading package maxent_ne_chunker to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package maxent_ne_chunker is already up-to-date!
[nltk_data]    | Downloading package punkt to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package punkt is already up-to-date!
[nltk_data]    | Downloading package book_grammars to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package book_grammars is already up-to-date!
[nltk_data]    | Downloading package sample_grammars to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package sample_grammars is already up-to-date!
[nltk_data]    | Downloading package spanish_grammars to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package spanish_grammars is already up-to-date!
[nltk_data]    | Downloading package basque_grammars to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package basque_grammars is already up-to-date!
[nltk_data]    | Downloading package large_grammars to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package large_grammars is already up-to-date!
[nltk_data]    | Downloading package tagsets to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package tagsets is already up-to-date!
[nltk_data]    | Downloading package snowball_data to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package snowball_data is already up-to-date!
[nltk_data]    | Downloading package bllip_wsj_no_aux to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package bllip_wsj_no_aux is already up-to-date!
[nltk_data]    | Downloading package word2vec_sample to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package word2vec_sample is already up-to-date!
[nltk_data]    | Downloading package panlex_swadesh to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package panlex_swadesh is already up-to-date!
[nltk_data]    | Downloading package mte_teip5 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package mte_teip5 is already up-to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger is already up-
[nltk_data]    |       to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger_ru to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger_ru is already
[nltk_data]    |       up-to-date!
[nltk_data]    | Downloading package perluniprops to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package perluniprops is already up-to-date!
[nltk_data]    | Downloading package nonbreaking_prefixes to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package nonbreaking_prefixes is already up-to-date!
[nltk_data]    | Downloading package vader_lexicon to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package vader_lexicon is already up-to-date!
[nltk_data]    | Downloading package porter_test to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package porter_test is already up-to-date!
[nltk_data]    | Downloading package wmt15_eval to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package wmt15_eval is already up-to-date!
[nltk_data]    | Downloading package mwa_ppdb to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package mwa_ppdb is already up-to-date!
[nltk_data]    | 
[nltk_data]  Done downloading collection all
Out[4]:
True
In [5]:
# path = '/content/drive/MyDrive/Files/'

path = 'C:\\Users\\pawan\\OneDrive\\Desktop\\ott\\Data\\'
 
df_tvshows = pd.read_csv(path + 'otttvshows.csv')
 
df_tvshows.head()
Out[5]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type
0 1 Snowpiercer 2013 18+ 6.9 94% NaN Daveed Diggs,Iddo Goldberg,Mickey Sumner,Aliso... Action,Drama,Sci-Fi,Thriller United States English Set seven years after the world has become a f... 60.0 tv series 3.0 1 0 0 0 1
1 2 Philadelphia 1993 13+ 8.8 80% NaN Charlie Day,Glenn Howerton,Rob McElhenney,Kait... Comedy United States English The gang, 5 raging alcoholic, narcissists run ... 22.0 tv series 18.0 1 0 0 0 1
2 3 Roma 2018 18+ 8.7 93% NaN Kevin McKidd,Ray Stevenson,Polly Walker,Kerry ... Action,Drama,History,Romance,War United Kingdom,United States English In this British historical drama, the turbulen... 52.0 tv series 2.0 1 0 0 0 1
3 4 Amy 2015 18+ 7.0 87% NaN Amy Brenneman,Richard T. Jones,Jessica Tuck,Ma... Drama United States English A family drama focused on three generations of... 60.0 tv series 6.0 1 0 1 1 1
4 5 The Young Offenders 2016 NaN 8.0 100% NaN Alex Murphy,Chris Walley,Hilary Rose,Dominic M... Comedy United Kingdom,Ireland English NaN 30.0 tv series 3.0 1 0 0 0 1
In [6]:
# profile = ProfileReport(df_tvshows)
# profile
In [7]:
def data_investigate(df):
    print('No of Rows : ', df.shape[0])
    print('No of Coloums : ', df.shape[1])
    print('**'*25)
    print('Colums Names : \n', df.columns)
    print('**'*25)
    print('Datatype of Columns : \n', df.dtypes)
    print('**'*25)
    print('Missing Values : ')
    c = df.isnull().sum()
    c = c[c > 0]
    print(c)
    print('**'*25)
    print('Missing vaules %age wise :\n')
    print((100*(df.isnull().sum()/len(df.index))))
    print('**'*25)
    print('Pictorial Representation : ')
    plt.figure(figsize = (10, 10))
    sns.heatmap(df.isnull(), yticklabels = False, cbar = False)
    plt.show()
In [8]:
data_investigate(df_tvshows)
No of Rows :  5432
No of Coloums :  20
**************************************************
Colums Names : 
 Index(['ID', 'Title', 'Year', 'Age', 'IMDb', 'Rotten Tomatoes', 'Directors',
       'Cast', 'Genres', 'Country', 'Language', 'Plotline', 'Runtime', 'Kind',
       'Seasons', 'Netflix', 'Hulu', 'Prime Video', 'Disney+', 'Type'],
      dtype='object')
**************************************************
Datatype of Columns : 
 ID                   int64
Title               object
Year                 int64
Age                 object
IMDb               float64
Rotten Tomatoes     object
Directors           object
Cast                object
Genres              object
Country             object
Language            object
Plotline            object
Runtime            float64
Kind                object
Seasons            float64
Netflix              int64
Hulu                 int64
Prime Video          int64
Disney+              int64
Type                 int64
dtype: object
**************************************************
Missing Values : 
Age                1954
IMDb                556
Rotten Tomatoes    4194
Directors          5158
Cast                486
Genres              323
Country             549
Language            638
Plotline           2493
Runtime            1410
Seasons             679
dtype: int64
**************************************************
Missing vaules %age wise :

ID                  0.000000
Title               0.000000
Year                0.000000
Age                35.972018
IMDb               10.235641
Rotten Tomatoes    77.209131
Directors          94.955817
Cast                8.946981
Genres              5.946244
Country            10.106775
Language           11.745214
Plotline           45.894698
Runtime            25.957290
Kind                0.000000
Seasons            12.500000
Netflix             0.000000
Hulu                0.000000
Prime Video         0.000000
Disney+             0.000000
Type                0.000000
dtype: float64
**************************************************
Pictorial Representation : 
In [9]:
# ID
# df_tvshows = df_tvshows.drop(['ID'], axis = 1)
 
# Age
df_tvshows.loc[df_tvshows['Age'].isnull() & df_tvshows['Disney+'] == 1, "Age"] = '13'
# df_tvshows.fillna({'Age' : 18}, inplace = True)
df_tvshows.fillna({'Age' : 'NR'}, inplace = True)
df_tvshows['Age'].replace({'all': '0'}, inplace = True)
df_tvshows['Age'].replace({'7+': '7'}, inplace = True)
df_tvshows['Age'].replace({'13+': '13'}, inplace = True)
df_tvshows['Age'].replace({'16+': '16'}, inplace = True)
df_tvshows['Age'].replace({'18+': '18'}, inplace = True)
# df_tvshows['Age'] = df_tvshows['Age'].astype(int)
 
# IMDb
# df_tvshows.fillna({'IMDb' : df_tvshows['IMDb'].mean()}, inplace = True)
# df_tvshows.fillna({'IMDb' : df_tvshows['IMDb'].median()}, inplace = True)
df_tvshows.fillna({'IMDb' : "NA"}, inplace = True)
 
# Rotten Tomatoes
df_tvshows['Rotten Tomatoes'] = df_tvshows['Rotten Tomatoes'][df_tvshows['Rotten Tomatoes'].notnull()].str.replace('%', '').astype(int)
# df_tvshows['Rotten Tomatoes'] = df_tvshows['Rotten Tomatoes'][df_tvshows['Rotten Tomatoes'].notnull()].astype(int)
# df_tvshows.fillna({'Rotten Tomatoes' : df_tvshows['Rotten Tomatoes'].mean()}, inplace = True)
# df_tvshows.fillna({'Rotten Tomatoes' : df_tvshows['Rotten Tomatoes'].median()}, inplace = True)
# df_tvshows['Rotten Tomatoes'] = df_tvshows['Rotten Tomatoes'].astype(int)
df_tvshows.fillna({'Rotten Tomatoes' : "NA"}, inplace = True)
 
# Directors
# df_tvshows = df_tvshows.drop(['Directors'], axis = 1)
df_tvshows.fillna({'Directors' : "NA"}, inplace = True)
 
# Cast
df_tvshows.fillna({'Cast' : "NA"}, inplace = True)
 
# Genres
df_tvshows.fillna({'Genres': "NA"}, inplace = True)
 
# Country
df_tvshows.fillna({'Country': "NA"}, inplace = True)
 
# Language
df_tvshows.fillna({'Language': "NA"}, inplace = True)
 
# Plotline
df_tvshows.fillna({'Plotline': "NA"}, inplace = True)
 
# Runtime
# df_tvshows.fillna({'Runtime' : df_tvshows['Runtime'].mean()}, inplace = True)
# df_tvshows['Runtime'] = df_tvshows['Runtime'].astype(int)
df_tvshows.fillna({'Runtime' : "NA"}, inplace = True)
 
# Kind
# df_tvshows.fillna({'Kind': "NA"}, inplace = True)
 
# Type
# df_tvshows.fillna({'Type': "NA"}, inplace = True)
# df_tvshows = df_tvshows.drop(['Type'], axis = 1)
 
# Seasons
# df_tvshows.fillna({'Seasons': 1}, inplace = True)
df_tvshows.fillna({'Seasons': "NA"}, inplace = True)
# df_tvshows = df_tvshows.drop(['Seasons'], axis = 1)
# df_tvshows['Seasons'] = df_tvshows['Seasons'].astype(int)
# df_tvshows.fillna({'Seasons' : df_tvshows['Seasons'].mean()}, inplace = True)
# df_tvshows['Seasons'] = df_tvshows['Seasons'].astype(int)
 
# Service Provider
df_tvshows['Service Provider'] = df_tvshows.loc[:, ['Netflix', 'Prime Video', 'Disney+', 'Hulu']].idxmax(axis = 1)
# df_tvshows.drop(['Netflix','Prime Video','Disney+','Hulu'], axis = 1)

# Removing Duplicate and Missing Entries
df_tvshows.dropna(how = 'any', inplace = True)
df_tvshows.drop_duplicates(inplace = True)
In [10]:
data_investigate(df_tvshows)
No of Rows :  5432
No of Coloums :  21
**************************************************
Colums Names : 
 Index(['ID', 'Title', 'Year', 'Age', 'IMDb', 'Rotten Tomatoes', 'Directors',
       'Cast', 'Genres', 'Country', 'Language', 'Plotline', 'Runtime', 'Kind',
       'Seasons', 'Netflix', 'Hulu', 'Prime Video', 'Disney+', 'Type',
       'Service Provider'],
      dtype='object')
**************************************************
Datatype of Columns : 
 ID                   int64
Title               object
Year                 int64
Age                 object
IMDb                object
Rotten Tomatoes     object
Directors           object
Cast                object
Genres              object
Country             object
Language            object
Plotline            object
Runtime             object
Kind                object
Seasons             object
Netflix              int64
Hulu                 int64
Prime Video          int64
Disney+              int64
Type                 int64
Service Provider    object
dtype: object
**************************************************
Missing Values : 
Series([], dtype: int64)
**************************************************
Missing vaules %age wise :

ID                  0.0
Title               0.0
Year                0.0
Age                 0.0
IMDb                0.0
Rotten Tomatoes     0.0
Directors           0.0
Cast                0.0
Genres              0.0
Country             0.0
Language            0.0
Plotline            0.0
Runtime             0.0
Kind                0.0
Seasons             0.0
Netflix             0.0
Hulu                0.0
Prime Video         0.0
Disney+             0.0
Type                0.0
Service Provider    0.0
dtype: float64
**************************************************
Pictorial Representation : 
In [11]:
df_tvshows.head()
Out[11]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type Service Provider
0 1 Snowpiercer 2013 18 6.9 94 NA Daveed Diggs,Iddo Goldberg,Mickey Sumner,Aliso... Action,Drama,Sci-Fi,Thriller United States ... Set seven years after the world has become a f... 60 tv series 3 1 0 0 0 1 Netflix
1 2 Philadelphia 1993 13 8.8 80 NA Charlie Day,Glenn Howerton,Rob McElhenney,Kait... Comedy United States ... The gang, 5 raging alcoholic, narcissists run ... 22 tv series 18 1 0 0 0 1 Netflix
2 3 Roma 2018 18 8.7 93 NA Kevin McKidd,Ray Stevenson,Polly Walker,Kerry ... Action,Drama,History,Romance,War United Kingdom,United States ... In this British historical drama, the turbulen... 52 tv series 2 1 0 0 0 1 Netflix
3 4 Amy 2015 18 7 87 NA Amy Brenneman,Richard T. Jones,Jessica Tuck,Ma... Drama United States ... A family drama focused on three generations of... 60 tv series 6 1 0 1 1 1 Netflix
4 5 The Young Offenders 2016 NR 8 100 NA Alex Murphy,Chris Walley,Hilary Rose,Dominic M... Comedy United Kingdom,Ireland ... NA 30 tv series 3 1 0 0 0 1 Netflix

5 rows × 21 columns

In [12]:
df_tvshows.describe()
Out[12]:
ID Year Netflix Hulu Prime Video Disney+ Type
count 5432.000000 5432.000000 5432.000000 5432.000000 5432.000000 5432.000000 5432.0
mean 2716.500000 2010.668446 0.341311 0.293999 0.403351 0.033689 1.0
std 1568.227662 11.726176 0.474193 0.455633 0.490615 0.180445 0.0
min 1.000000 1901.000000 0.000000 0.000000 0.000000 0.000000 1.0
25% 1358.750000 2009.000000 0.000000 0.000000 0.000000 0.000000 1.0
50% 2716.500000 2014.000000 0.000000 0.000000 0.000000 0.000000 1.0
75% 4074.250000 2017.000000 1.000000 1.000000 1.000000 0.000000 1.0
max 5432.000000 2020.000000 1.000000 1.000000 1.000000 1.000000 1.0
In [13]:
df_tvshows.corr()
Out[13]:
ID Year Netflix Hulu Prime Video Disney+ Type
ID 1.000000 -0.031346 -0.646330 0.034293 0.441264 0.195409 NaN
Year -0.031346 1.000000 0.222316 -0.065807 -0.198675 -0.022741 NaN
Netflix -0.646330 0.222316 1.000000 -0.366515 -0.515086 -0.119344 NaN
Hulu 0.034293 -0.065807 -0.366515 1.000000 -0.377374 -0.075701 NaN
Prime Video 0.441264 -0.198675 -0.515086 -0.377374 1.000000 -0.151442 NaN
Disney+ 0.195409 -0.022741 -0.119344 -0.075701 -0.151442 1.000000 NaN
Type NaN NaN NaN NaN NaN NaN NaN
In [14]:
# df_tvshows.sort_values('Year', ascending = True)
# df_tvshows.sort_values('IMDb', ascending = False)
In [15]:
# df_tvshows.to_csv(path_or_buf= '/content/drive/MyDrive/Files/updated_otttvshows.csv', index = False)
 
# path = '/content/drive/MyDrive/Files/'
 
# udf_tvshows = pd.read_csv(path + 'updated_otttvshows.csv')
 
# udf_tvshows
In [16]:
# df_netflix_tvshows = df_tvshows.loc[(df_tvshows['Netflix'] > 0)]
# df_hulu_tvshows = df_tvshows.loc[(df_tvshows['Hulu'] > 0)]
# df_prime_video_tvshows = df_tvshows.loc[(df_tvshows['Prime Video'] > 0)]
# df_disney_tvshows = df_tvshows.loc[(df_tvshows['Disney+'] > 0)]
In [17]:
df_netflix_only_tvshows = df_tvshows[(df_tvshows['Netflix'] == 1) & (df_tvshows['Hulu'] == 0) & (df_tvshows['Prime Video'] == 0 ) & (df_tvshows['Disney+'] == 0)]
df_hulu_only_tvshows = df_tvshows[(df_tvshows['Netflix'] == 0) & (df_tvshows['Hulu'] == 1) & (df_tvshows['Prime Video'] == 0 ) & (df_tvshows['Disney+'] == 0)]
df_prime_video_only_tvshows = df_tvshows[(df_tvshows['Netflix'] == 0) & (df_tvshows['Hulu'] == 0) & (df_tvshows['Prime Video'] == 1 ) & (df_tvshows['Disney+'] == 0)]
df_disney_only_tvshows = df_tvshows[(df_tvshows['Netflix'] == 0) & (df_tvshows['Hulu'] == 0) & (df_tvshows['Prime Video'] == 0 ) & (df_tvshows['Disney+'] == 1)]
In [18]:
df_tvshows_years = df_tvshows.copy()
In [19]:
df_tvshows_years.drop(df_tvshows_years.loc[df_tvshows_years['Year'] == "NA"].index, inplace = True)
# df_tvshows_years = df_tvshows_years[df_tvshows_years.Year != "NA"]
df_tvshows_years['Year'] = df_tvshows_years['Year'].astype(int)
In [20]:
# Creating distinct dataframes only with the tvshows present on individual streaming platforms
netflix_years_tvshows = df_tvshows_years.loc[df_tvshows_years['Netflix'] == 1]
hulu_years_tvshows = df_tvshows_years.loc[df_tvshows_years['Hulu'] == 1]
prime_video_years_tvshows = df_tvshows_years.loc[df_tvshows_years['Prime Video'] == 1]
disney_years_tvshows = df_tvshows_years.loc[df_tvshows_years['Disney+'] == 1]
In [21]:
df_tvshows_years_group = df_tvshows_years.copy()
In [22]:
plt.figure(figsize = (10, 10))
corr = df_tvshows_years.corr()
# Plot figsize
fig, ax = plt.subplots(figsize=(10, 8))
# Generate Heat Map, allow annotations and place floats in map
sns.heatmap(corr, cmap = 'magma', annot = True, fmt = ".2f")
# Apply xticks
plt.xticks(range(len(corr.columns)), corr.columns);
# Apply yticks
plt.yticks(range(len(corr.columns)), corr.columns)
# show plot
plt.show()
fig.show()
<Figure size 720x720 with 0 Axes>
In [23]:
df_years_high_tvshows = df_tvshows_years.sort_values(by = 'Year', ascending = False).reset_index()
df_years_high_tvshows = df_years_high_tvshows.drop(['index'], axis = 1)
# filter = (df_tvshows_years['Year'] == (df_tvshows_years['Year'].max()))
# df_years_high_tvshows = df_tvshows_years[filter]
 
# highest_rated_tvshows = df_tvshows_years.loc[df_tvshows_years['Year'].idxmax()]
 
print('\nTV Shows with Highest Ever Year  are : \n')
df_years_high_tvshows.head(5)
TV Shows with Highest Ever Year  are : 

Out[23]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type Service Provider
0 894 Gentefied 2020 18 7.4 91 NA Joaquín Cosio,Joseph Julian Soria,Karrie Marti... Comedy United States ... NA NA tv series 2 1 0 0 0 1 Netflix
1 3004 BOFURI: I Don’t Want to Get Hurt, so I’ll Max ... 2020 16 7.5 NA NA Jad Saxton,Megan Shipman,Anthony Bowling,Tia L... Animation,Action,Adventure,Comedy,Fantasy,Sci-Fi Japan ... NA 23 tv series 1 0 1 0 0 1 Hulu
2 940 The Pharmacist 2020 18 7.7 89 NA NA Documentary,Crime United States ... Seventeen year-old Kim is the pride and joy of... 217 tv series 1 1 0 0 0 1 Netflix
3 3038 The Bachelor Presents: Listen to Your Heart 2020 16 4.9 NA NA Chris Harrison,Jamie Gabrielle,Matt Ranaudo,Br... Drama,Game-Show,Music,Reality-TV,Romance United States ... NA 120 tv series 1 0 1 0 0 1 Hulu
4 1425 Almost Happy 2020 NR 6.8 NA NA Sebastián Wainraich,Natalie Pérez,Santiago Kor... Comedy Argentina ... NA NA tv series 1 1 0 0 0 1 Netflix

5 rows × 21 columns

In [24]:
fig = px.bar(y = df_years_high_tvshows['Title'][:15],
             x = df_years_high_tvshows['Year'][:15], 
             color = df_years_high_tvshows['Year'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Year : In Minutes'},
             title  = 'TV Shows with Highest Year in Minutes : All Platforms')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [25]:
df_years_low_tvshows = df_tvshows_years.sort_values(by = 'Year', ascending = True).reset_index()
df_years_low_tvshows = df_years_low_tvshows.drop(['index'], axis = 1)
# filter = (df_tvshows_years['Year'] == (df_tvshows_years['Year'].min()))
# df_years_low_tvshows = df_tvshows_years[filter]

print('\nTV Shows with Lowest Ever Year  are : \n')
df_years_low_tvshows.head(5)
TV Shows with Lowest Ever Year  are : 

Out[25]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type Service Provider
0 4665 Gods & Monsters with Tony Robinson 1901 NR 7.3 NA NA Tony Robinson,Little Woodham Villagers,Peter M... History United Kingdom ... NA 60 tv series 1 0 0 1 0 1 Prime Video
1 4684 History of Westinghouse 1904 NR NA NA NA NA NA NA ... NA NA tv series NA 0 0 1 0 1 Prime Video
2 2204 Born To Explore 1914 0 7.5 NA NA Richard Wiese,Kenneth Lacovara,Belle Aykroyd,D... Adventure United States ... NA 30 tv series 8 1 0 0 0 1 Netflix
3 126 Nosferatu 1922 NR 6.7 97 NA Ashleigh Cummings,Ólafur Darri Ólafsson,Jahkar... Drama,Fantasy,Horror,Mystery United States ... A young Victoria "Vic" McQueen discovers she h... 60 tv series 2 0 0 1 0 1 Prime Video
4 139 College 1927 NR 6.8 12 NA Tom Hanks,Peter Scolari,Donna Dixon,Holland Ta... Comedy United States ... After her husband's death, Hana lives on alone... 30 tv series 2 0 0 1 0 1 Prime Video

5 rows × 21 columns

In [26]:
fig = px.bar(y = df_years_low_tvshows['Title'][:15],
             x = df_years_low_tvshows['Year'][:15], 
             color = df_years_low_tvshows['Year'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Year : In Minutes'},
             title  = 'TV Shows with Lowest Year in Minutes : All Platforms')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [27]:
print(f'''
      Total '{df_tvshows_years['Year'].unique().shape[0]}' unique Year s were Given, They were Like this,\n
      
{df_tvshows_years.sort_values(by = 'Year', ascending = False)['Year'].unique()}\n
 
      The Highest Ever Year Ever Any TV Show Got is '{df_years_high_tvshows['Title'][0]}' : '{df_years_high_tvshows['Year'].max()}'\n
 
      The Lowest Ever Year Ever Any TV Show Got is '{df_years_low_tvshows['Title'][0]}' : '{df_years_low_tvshows['Year'].min()}'\n
      ''')
      Total '89' unique Year s were Given, They were Like this,

      
[2020 2019 2018 2017 2016 2015 2014 2013 2012 2011 2010 2009 2008 2007
 2006 2005 2004 2003 2002 2001 2000 1999 1998 1997 1996 1995 1994 1993
 1992 1991 1990 1989 1988 1987 1986 1985 1984 1983 1982 1981 1980 1979
 1978 1977 1976 1975 1974 1973 1972 1971 1970 1969 1968 1967 1966 1965
 1964 1963 1962 1961 1960 1959 1958 1957 1956 1955 1954 1953 1952 1951
 1950 1949 1948 1947 1946 1945 1944 1943 1942 1940 1938 1937 1936 1932
 1927 1922 1914 1904 1901]

 
      The Highest Ever Year Ever Any TV Show Got is 'Gentefied' : '2020'

 
      The Lowest Ever Year Ever Any TV Show Got is 'Gods & Monsters with Tony Robinson' : '1901'

      
In [28]:
netflix_years_high_tvshows = df_years_high_tvshows.loc[df_years_high_tvshows['Netflix']==1].reset_index()
netflix_years_high_tvshows = netflix_years_high_tvshows.drop(['index'], axis = 1)
 
netflix_years_low_tvshows = df_years_low_tvshows.loc[df_years_low_tvshows['Netflix']==1].reset_index()
netflix_years_low_tvshows = netflix_years_low_tvshows.drop(['index'], axis = 1)
 
netflix_years_high_tvshows.head(5)
Out[28]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type Service Provider
0 894 Gentefied 2020 18 7.4 91 NA Joaquín Cosio,Joseph Julian Soria,Karrie Marti... Comedy United States ... NA NA tv series 2 1 0 0 0 1 Netflix
1 940 The Pharmacist 2020 18 7.7 89 NA NA Documentary,Crime United States ... Seventeen year-old Kim is the pride and joy of... 217 tv series 1 1 0 0 0 1 Netflix
2 1425 Almost Happy 2020 NR 6.8 NA NA Sebastián Wainraich,Natalie Pérez,Santiago Kor... Comedy Argentina ... NA NA tv series 1 1 0 0 0 1 Netflix
3 927 #blackAF 2020 18 6.8 46 NA Rashida Jones,Kenya Barris,Iman Benson,Genneya... Comedy United States ... Jim Lake Jr. is an ordinary kid with a busy Mo... 36 tv series 2 1 0 0 0 1 Netflix
4 1826 Ainori Love Wagon: African Journey 2020 NR 7.2 NA NA Becky,Ryô Katô,Karina Maruyama Reality-TV Japan ... NA NA tv series NA 1 0 0 0 1 Netflix

5 rows × 21 columns

In [29]:
fig = px.bar(y = netflix_years_high_tvshows['Title'][:15],
             x = netflix_years_high_tvshows['Year'][:15], 
             color = netflix_years_high_tvshows['Year'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Year : In Minutes'},
             title  = 'TV Shows with Highest Year in Minutes : Netflix')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [30]:
fig = px.bar(y = netflix_years_low_tvshows['Title'][:15],
             x = netflix_years_low_tvshows['Year'][:15], 
             color = netflix_years_low_tvshows['Year'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Year : In Minutes'},
             title  = 'TV Shows with Lowest Year in Minutes : Netflix')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [31]:
hulu_years_high_tvshows = df_years_high_tvshows.loc[df_years_high_tvshows['Hulu']==1].reset_index()
hulu_years_high_tvshows = hulu_years_high_tvshows.drop(['index'], axis = 1)
 
hulu_years_low_tvshows = df_years_low_tvshows.loc[df_years_low_tvshows['Hulu']==1].reset_index()
hulu_years_low_tvshows = hulu_years_low_tvshows.drop(['index'], axis = 1)
 
hulu_years_high_tvshows.head(5)
Out[31]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type Service Provider
0 3004 BOFURI: I Don’t Want to Get Hurt, so I’ll Max ... 2020 16 7.5 NA NA Jad Saxton,Megan Shipman,Anthony Bowling,Tia L... Animation,Action,Adventure,Comedy,Fantasy,Sci-Fi Japan ... NA 23 tv series 1 0 1 0 0 1 Hulu
1 3038 The Bachelor Presents: Listen to Your Heart 2020 16 4.9 NA NA Chris Harrison,Jamie Gabrielle,Matt Ranaudo,Br... Drama,Game-Show,Music,Reality-TV,Romance United States ... NA 120 tv series 1 0 1 0 0 1 Hulu
2 3015 ID: INVADED 2020 18 7.6 NA NA Kenjirô Tsuda,Sarah Emi Bridcutt,Yoshimasa Hos... Animation,Crime,Drama,Mystery,Sci-Fi,Thriller Japan ... We follow a band of American soldiers as they ... 24 tv series 1 0 1 0 0 1 Hulu
3 2999 Council of Dads 2020 16 6.7 50 NA Sarah Wayne Callies,Clive Standen,J. August Ri... Drama United States ... NA 44 tv series 1 0 1 0 0 1 Hulu
4 2994 Toilet-Bound Hanako-kun 2020 16 7.4 NA NA Justin Briner,Megumi Ogata,Tyson Rinehart,Tia ... Animation,Comedy,Fantasy Japan ... An Americanized version of the original Japane... 24 tv series 1 0 1 0 0 1 Hulu

5 rows × 21 columns

In [32]:
fig = px.bar(y = hulu_years_high_tvshows['Title'][:15],
             x = hulu_years_high_tvshows['Year'][:15], 
             color = hulu_years_high_tvshows['Year'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Year : In Minutes'},
             title  = 'TV Shows with Highest Year in Minutes : Hulu')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [33]:
fig = px.bar(y = hulu_years_low_tvshows['Title'][:15],
             x = hulu_years_low_tvshows['Year'][:15], 
             color = hulu_years_low_tvshows['Year'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Year : In Minutes'},
             title  = 'TV Shows with Lowest Year in Minutes : Hulu')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [34]:
prime_video_years_high_tvshows = df_years_high_tvshows.loc[df_years_high_tvshows['Prime Video']==1].reset_index()
prime_video_years_high_tvshows = prime_video_years_high_tvshows.drop(['index'], axis = 1)
 
prime_video_years_low_tvshows = df_years_low_tvshows.loc[df_years_low_tvshows['Prime Video']==1].reset_index()
prime_video_years_low_tvshows = prime_video_years_low_tvshows.drop(['index'], axis = 1)
 
prime_video_years_high_tvshows.head(5)
Out[34]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type Service Provider
0 3780 Tales from the Loop 2020 18 7.5 85 NA Daniel Zolghadri,Paul Schneider,Rebecca Hall,R... Drama,Sci-Fi United States ... God has abandoned Heaven. It's 1985: the Reaga... 50 tv series 1 0 0 1 0 1 Prime Video
1 204 Evolution of Evil 2020 NR 6.5 NA NA Alisdair Simpson,Mohamed Atta,Luigi Boccanfuso... Documentary,Biography,History Germany,United Kingdom ... A millionaire is found dead of heart failure h... 50 tv series 1 0 0 1 0 1 Prime Video
2 4461 Jessy & Nessy 2020 0 8 NA NA Jamie Buchanan,Alexa Bauer,Shai Matheson,Naomi... Animation Ireland ... Franny's Feet is about a 5 year old girl who v... NA tv series 1 0 0 1 0 1 Prime Video
3 3735 Upload 2020 18 8 88 NA Robbie Amell,Andy Allo,Zainab Johnson,Kevin Bi... Comedy,Mystery,Sci-Fi United States ... When CIA analyst Jack Ryan stumbles upon a sus... 296 tv series 2 0 0 1 0 1 Prime Video
4 3790 Hunters 2020 18 7.2 64 NA Logan Lerman,Jerrika Hinton,Lena Olin,Saul Rub... Crime,Drama,Mystery United States ... NA 60 tv series 2 0 0 1 0 1 Prime Video

5 rows × 21 columns

In [35]:
fig = px.bar(y = prime_video_years_high_tvshows['Title'][:15],
             x = prime_video_years_high_tvshows['Year'][:15], 
             color = prime_video_years_high_tvshows['Year'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Year : In Minutes'},
             title  = 'TV Shows with Highest Year in Minutes : Prime Video')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [36]:
fig = px.bar(y = prime_video_years_low_tvshows['Title'][:15],
             x = prime_video_years_low_tvshows['Year'][:15], 
             color = prime_video_years_low_tvshows['Year'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Year : In Minutes'},
             title  = 'TV Shows with Lowest Year in Minutes : Prime Video')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [37]:
disney_years_high_tvshows = df_years_high_tvshows.loc[df_years_high_tvshows['Disney+']==1].reset_index()
disney_years_high_tvshows = disney_years_high_tvshows.drop(['index'], axis = 1)
 
disney_years_low_tvshows = df_years_low_tvshows.loc[df_years_low_tvshows['Disney+']==1].reset_index()
disney_years_low_tvshows = disney_years_low_tvshows.drop(['index'], axis = 1)
 
disney_years_high_tvshows.head(5)
Out[37]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type Service Provider
0 5395 It's A Dog's Life 2020 0 8.2 NA NA Bill Farmer,Esther Abshier,Linda Castaneda,Eba... Documentary United States ... NA 22 tv series 1 0 0 0 1 1 Disney+
1 5342 Diary of a Future President 2020 7 5.9 100 NA Sanai Victoria,Nathan Arenas,Tess Romero,Selen... Comedy,Drama,Family United States ... Executive producer Kristen Bell, who also appe... 30 tv series 2 0 0 0 1 1 Disney+
2 5333 Prop Culture 2020 7 8.2 NA NA Dan Lanigan,Don Bies,Andrew Adamson,Erin Andre... Documentary United States ... Peter Parker has been Spider-Man for eight yea... 35 tv series 1 0 0 0 1 1 Disney+
3 5318 Disney Gallery / Star Wars: The Mandalorian 2020 7 8.5 100 Josiah Swanson Josiah Swanson Talk-Show NA ... NA NA tv series NA 0 0 0 1 1 Disney+
4 487 Stargirl 2020 7 7.3 70 NA Brec Bassinger,Yvette Monreal,Anjelika Washing... Action,Adventure,Crime,Drama,Fantasy,Sci-Fi United States ... NA 566 tv series 2 0 0 0 1 1 Disney+

5 rows × 21 columns

In [38]:
fig = px.bar(y = disney_years_high_tvshows['Title'][:15],
             x = disney_years_high_tvshows['Year'][:15], 
             color = disney_years_high_tvshows['Year'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Year : In Minutes'},
             title  = 'TV Shows with Highest Year in Minutes : Disney+')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [39]:
fig = px.bar(y = disney_years_low_tvshows['Title'][:15],
             x = disney_years_low_tvshows['Year'][:15], 
             color = disney_years_low_tvshows['Year'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Year : In Minutes'},
             title  = 'TV Shows with Lowest Year in Minutes : Disney+')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [40]:
print(f'''
      The TV Show with Highest Year  Ever Got is '{df_years_high_tvshows['Title'][0]}' : '{df_years_high_tvshows['Year'].max()}'\n
      The TV Show with Lowest Year  Ever Got is '{df_years_low_tvshows['Title'][0]}' : '{df_years_low_tvshows['Year'].min()}'\n
      
      The TV Show with Highest Year  on 'Netflix' is '{netflix_years_high_tvshows['Title'][0]}' : '{netflix_years_high_tvshows['Year'].max()}'\n
      The TV Show with Lowest Year  on 'Netflix' is '{netflix_years_low_tvshows['Title'][0]}' : '{netflix_years_low_tvshows['Year'].min()}'\n
      
      The TV Show with Highest Year  on 'Hulu' is '{hulu_years_high_tvshows['Title'][0]}' : '{hulu_years_high_tvshows['Year'].max()}'\n
      The TV Show with Lowest Year  on 'Hulu' is '{hulu_years_low_tvshows['Title'][0]}' : '{hulu_years_low_tvshows['Year'].min()}'\n
      
      The TV Show with Highest Year  on 'Prime Video' is '{prime_video_years_high_tvshows['Title'][0]}' : '{prime_video_years_high_tvshows['Year'].max()}'\n
      The TV Show with Lowest Year  on 'Prime Video' is '{prime_video_years_low_tvshows['Title'][0]}' : '{prime_video_years_low_tvshows['Year'].min()}'\n
      
      The TV Show with Highest Year  on 'Disney+' is '{disney_years_high_tvshows['Title'][0]}' : '{disney_years_high_tvshows['Year'].max()}'\n
      The TV Show with Lowest Year  on 'Disney+' is '{disney_years_low_tvshows['Title'][0]}' : '{disney_years_low_tvshows['Year'].min()}'\n 
      ''')
      The TV Show with Highest Year  Ever Got is 'Gentefied' : '2020'

      The TV Show with Lowest Year  Ever Got is 'Gods & Monsters with Tony Robinson' : '1901'

      
      The TV Show with Highest Year  on 'Netflix' is 'Gentefied' : '2020'

      The TV Show with Lowest Year  on 'Netflix' is 'Born To Explore' : '1914'

      
      The TV Show with Highest Year  on 'Hulu' is 'BOFURI: I Don’t Want to Get Hurt, so I’ll Max Out My Defense.' : '2020'

      The TV Show with Lowest Year  on 'Hulu' is 'You Bet Your Life' : '1947'

      
      The TV Show with Highest Year  on 'Prime Video' is 'Tales from the Loop' : '2020'

      The TV Show with Lowest Year  on 'Prime Video' is 'Gods & Monsters with Tony Robinson' : '1901'

      
      The TV Show with Highest Year  on 'Disney+' is 'It's A Dog's Life' : '2020'

      The TV Show with Lowest Year  on 'Disney+' is 'The Plausible Impossible' : '1956'
 
      
In [41]:
print(f'''
      Accross All Platforms the Average Year  is '{round(df_tvshows_years['Year'].mean(), ndigits = 2)}'\n
      The Average Year  on 'Netflix' is '{round(netflix_years_tvshows['Year'].mean(), ndigits = 2)}'\n
      The Average Year  on 'Hulu' is '{round(hulu_years_tvshows['Year'].mean(), ndigits = 2)}'\n
      The Average Year  on 'Prime Video' is '{round(prime_video_years_tvshows['Year'].mean(), ndigits = 2)}'\n
      The Average Year  on 'Disney+' is '{round(disney_years_tvshows['Year'].mean(), ndigits = 2)}'\n 
      ''')
      Accross All Platforms the Average Year  is '2010.67'

      The Average Year  on 'Netflix' is '2014.29'

      The Average Year  on 'Hulu' is '2009.47'

      The Average Year  on 'Prime Video' is '2007.84'

      The Average Year  on 'Disney+' is '2009.24'
 
      
In [42]:
f, ax = plt.subplots(1, 2 , figsize = (20, 5))
sns.distplot(df_tvshows_years['Year'],bins = 20, kde = True, ax = ax[0])
sns.boxplot(df_tvshows_years['Year'], ax = ax[1])
plt.show()
In [43]:
# Defining plot size and title
plt.figure(figsize = (20, 5))
plt.title('Year s Per Platform')
 
# Plotting the information from each dataset into a histogram
sns.histplot(prime_video_years_tvshows['Year'][:100], color = 'lightblue', legend = True, kde = True)
sns.histplot(netflix_years_tvshows['Year'][:100], color = 'red', legend = True, kde = True)
sns.histplot(hulu_years_tvshows['Year'][:100], color = 'lightgreen', legend = True, kde = True)
sns.histplot(disney_years_tvshows['Year'][:100], color = 'darkblue', legend = True, kde = True) 
 
# Setting the legend
plt.legend(['Prime Video', 'Netflix', 'Hulu', 'Disney+'])
plt.show()
In [44]:
year_count = df_tvshows_years.groupby('Year')['Title'].count()
year_tvshows = df_tvshows_years.groupby('Year')[['Netflix', 'Hulu', 'Prime Video', 'Disney+']].sum()
year_data_tvshows = pd.concat([year_count, year_tvshows], axis = 1).reset_index().rename(columns = {'Title' : 'TV Shows Count'})
year_data_tvshows = year_data_tvshows.sort_values(by = 'TV Shows Count', ascending = False)
In [45]:
# TV Shows Count per Year - All Platforms Combined
year_data_tvshows.head()
Out[45]:
Year TV Shows Count Netflix Hulu Prime Video Disney+
85 2017 605 214 116 276 19
86 2018 554 271 129 162 14
84 2016 543 212 115 226 10
83 2015 429 171 115 165 9
87 2019 382 231 94 44 21
In [46]:
fig = px.bar(y = year_data_tvshows['TV Shows Count'],
             x = year_data_tvshows['Year'], 
             color = year_data_tvshows['Year'],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows Count', 'x' : 'Year : In Minutes'},
             title  = 'TV Shows with Year : All Platforms')

fig.update_layout(plot_bgcolor = "white")
fig.show()
In [47]:
fig = px.pie(year_data_tvshows[:10],
             names = year_data_tvshows['Year'][:10],
             values = year_data_tvshows['TV Shows Count'][:10],
             color = year_data_tvshows['TV Shows Count'][:10],
             color_discrete_sequence = px.colors.sequential.Teal)

fig.update_traces(textinfo = 'percent+label',
                  title = 'TV Shows Count based on Year Group')
fig.show()
In [48]:
# Highest TV Shows Count per Year - All Platforms Combined
df_year_high_tvshows = year_data_tvshows.sort_values(by = 'TV Shows Count', ascending = False).reset_index()
df_year_high_tvshows = df_year_high_tvshows.drop(['index'], axis = 1)
# filter = (year_data_tvshows['TV Shows Count'] = =  (year_data_tvshows['TV Shows Count'].max()))
# df_year_high_tvshows = year_data_tvshows[filter]
 
# highest_rated_tvshows = year_data_tvshows.loc[year_data_tvshows['TV Shows Count'].idxmax()]
 
print('\nYear with Highest Ever TV Shows Count are : All Platforms Combined\n')
df_year_high_tvshows.head(5)
Year with Highest Ever TV Shows Count are : All Platforms Combined

Out[48]:
Year TV Shows Count Netflix Hulu Prime Video Disney+
0 2017 605 214 116 276 19
1 2018 554 271 129 162 14
2 2016 543 212 115 226 10
3 2015 429 171 115 165 9
4 2019 382 231 94 44 21
In [49]:
fig = px.bar(y = df_year_high_tvshows['TV Shows Count'][:10],
             x = df_year_high_tvshows['Year'][:10], 
             color = df_year_high_tvshows['TV Shows Count'][:10],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows Count', 'x' : 'Year : In Minutes'},
             title  = 'Year with Highest TV Shows Count : All Platforms')

fig.update_layout(plot_bgcolor = "white")
fig.show()
In [50]:
# Lowest TV Shows Count per Year - All Platforms Combined
df_year_low_tvshows = year_data_tvshows.sort_values(by = 'TV Shows Count', ascending = True).reset_index()
df_year_low_tvshows = df_year_low_tvshows.drop(['index'], axis = 1)
# filter = (year_data_tvshows['TV Shows Count'] = =  (year_data_tvshows['TV Shows Count'].min()))
# df_year_low_tvshows = year_data_tvshows[filter]
 
print('\nYear with Lowest Ever TV Shows Count are : All Platforms Combined\n')
df_year_low_tvshows.head(5)
Year with Lowest Ever TV Shows Count are : All Platforms Combined

Out[50]:
Year TV Shows Count Netflix Hulu Prime Video Disney+
0 1901 1 0 0 1 0
1 1948 1 0 0 1 0
2 1949 1 0 0 1 0
3 1945 1 0 0 1 0
4 1944 1 0 0 1 0
In [51]:
fig = px.bar(y = df_year_low_tvshows['TV Shows Count'][:10],
             x = df_year_low_tvshows['Year'][:10], 
             color = df_year_low_tvshows['TV Shows Count'][:10],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows Count', 'x' : 'Year : In Minutes'},
             title  = 'Year with Lowest TV Shows Count : All Platforms')

fig.update_layout(plot_bgcolor = "white")
fig.show()
In [52]:
print(f'''
      Total '{df_tvshows_years['Year'].count()}' Titles are available on All Platforms, out of which\n
      You Can Choose to see TV Shows from Total '{year_data_tvshows['Year'].unique().shape[0]}' Year, They were Like this, \n
 
      {year_data_tvshows.sort_values(by = 'TV Shows Count', ascending = False)['Year'].head(5).unique()} etc. \n
 
      The Year with Highest TV Shows Count have '{year_data_tvshows['TV Shows Count'].max()}' TV Shows Available is '{df_year_high_tvshows['Year'][0]}', &\n
      The Year with Lowest TV Shows Count have '{year_data_tvshows['TV Shows Count'].min()}' TV Shows Available is '{df_year_low_tvshows['Year'][0]}'
      ''')
      Total '5432' Titles are available on All Platforms, out of which

      You Can Choose to see TV Shows from Total '89' Year, They were Like this, 

 
      [2017 2018 2016 2015 2019] etc. 

 
      The Year with Highest TV Shows Count have '605' TV Shows Available is '2017', &

      The Year with Lowest TV Shows Count have '1' TV Shows Available is '1901'
      
In [53]:
# Highest TV Shows Count per Year - Netflix
netflix_year_tvshows = year_data_tvshows[year_data_tvshows['Netflix'] !=  0].sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_year_tvshows = netflix_year_tvshows.drop(['index', 'Hulu', 'Prime Video', 'Disney+', 'TV Shows Count'], axis = 1)
 
netflix_year_high_tvshows = df_year_high_tvshows.sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_year_high_tvshows = netflix_year_high_tvshows.drop(['index'], axis = 1)
 
netflix_year_low_tvshows = df_year_high_tvshows.sort_values(by = 'Netflix', ascending = True).reset_index()
netflix_year_low_tvshows = netflix_year_low_tvshows.drop(['index'], axis = 1)
 
netflix_year_high_tvshows.head(5)
Out[53]:
Year TV Shows Count Netflix Hulu Prime Video Disney+
0 2018 554 271 129 162 14
1 2019 382 231 94 44 21
2 2017 605 214 116 276 19
3 2016 543 212 115 226 10
4 2015 429 171 115 165 9
In [54]:
# Highest TV Shows Count per Year - Hulu
hulu_year_tvshows = year_data_tvshows[year_data_tvshows['Hulu'] !=  0].sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_year_tvshows = hulu_year_tvshows.drop(['index', 'Netflix', 'Prime Video', 'Disney+', 'TV Shows Count'], axis = 1)
 
hulu_year_high_tvshows = df_year_high_tvshows.sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_year_high_tvshows = hulu_year_high_tvshows.drop(['index'], axis = 1)
 
hulu_year_low_tvshows = df_year_high_tvshows.sort_values(by = 'Hulu', ascending = True).reset_index()
hulu_year_low_tvshows = hulu_year_low_tvshows.drop(['index'], axis = 1)
 
hulu_year_high_tvshows.head(5)
Out[54]:
Year TV Shows Count Netflix Hulu Prime Video Disney+
0 2018 554 271 129 162 14
1 2017 605 214 116 276 19
2 2016 543 212 115 226 10
3 2015 429 171 115 165 9
4 2014 368 137 114 134 10
In [55]:
# Highest TV Shows Count per Year - Prime Video
prime_video_year_tvshows = year_data_tvshows[year_data_tvshows['Prime Video'] !=  0].sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_year_tvshows = prime_video_year_tvshows.drop(['index', 'Netflix', 'Hulu', 'Disney+', 'TV Shows Count'], axis = 1)
 
prime_video_year_high_tvshows = df_year_high_tvshows.sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_year_high_tvshows = prime_video_year_high_tvshows.drop(['index'], axis = 1)
 
prime_video_year_low_tvshows = df_year_high_tvshows.sort_values(by = 'Prime Video', ascending = True).reset_index()
prime_video_year_low_tvshows = prime_video_year_low_tvshows.drop(['index'], axis = 1)
 
prime_video_year_high_tvshows.head(5)
Out[55]:
Year TV Shows Count Netflix Hulu Prime Video Disney+
0 2017 605 214 116 276 19
1 2016 543 212 115 226 10
2 2015 429 171 115 165 9
3 2018 554 271 129 162 14
4 2014 368 137 114 134 10
In [56]:
# Highest TV Shows Count per Year - Disney+
disney_year_tvshows = year_data_tvshows[year_data_tvshows['Disney+'] !=  0].sort_values(by = 'Disney+', ascending = False).reset_index()
disney_year_tvshows = disney_year_tvshows.drop(['index', 'Netflix', 'Hulu', 'Prime Video', 'TV Shows Count'], axis = 1)
 
disney_year_high_tvshows = df_year_high_tvshows.sort_values(by = 'Disney+', ascending = False).reset_index()
disney_year_high_tvshows = disney_year_high_tvshows.drop(['index'], axis = 1)
 
disney_year_low_tvshows = df_year_high_tvshows.sort_values(by = 'Disney+', ascending = True).reset_index()
disney_year_low_tvshows = disney_year_low_tvshows.drop(['index'], axis = 1)
 
disney_year_high_tvshows.head(5)
Out[56]:
Year TV Shows Count Netflix Hulu Prime Video Disney+
0 2019 382 231 94 44 21
1 2017 605 214 116 276 19
2 2018 554 271 129 162 14
3 2012 283 79 96 117 11
4 2016 543 212 115 226 10
In [57]:
print(f'''
      The Year with Highest TV Shows Count Ever Got is '{df_year_high_tvshows['Year'][0]}' : '{df_year_high_tvshows['TV Shows Count'].max()}'\n
      The Year with Lowest TV Shows Count Ever Got is '{df_year_low_tvshows['Year'][0]}' : '{df_year_low_tvshows['TV Shows Count'].min()}'\n
      
      The Year with Highest TV Shows Count on 'Netflix' is '{netflix_year_high_tvshows['Year'][0]}' : '{netflix_year_high_tvshows['Netflix'].max()}'\n
      The Year with Lowest TV Shows Count on 'Netflix' is '{netflix_year_low_tvshows['Year'][0]}' : '{netflix_year_low_tvshows['Netflix'].min()}'\n
      
      The Year with Highest TV Shows Count on 'Hulu' is '{hulu_year_high_tvshows['Year'][0]}' : '{hulu_year_high_tvshows['Hulu'].max()}'\n
      The Year with Lowest TV Shows Count on 'Hulu' is '{hulu_year_low_tvshows['Year'][0]}' : '{hulu_year_low_tvshows['Hulu'].min()}'\n
      
      The Year with Highest TV Shows Count on 'Prime Video' is '{prime_video_year_high_tvshows['Year'][0]}' : '{prime_video_year_high_tvshows['Prime Video'].max()}'\n
      The Year with Lowest TV Shows Count on 'Prime Video' is '{prime_video_year_low_tvshows['Year'][0]}' : '{prime_video_year_low_tvshows['Prime Video'].min()}'\n
      
      The Year with Highest TV Shows Count on 'Disney+' is '{disney_year_high_tvshows['Year'][0]}' : '{disney_year_high_tvshows['Disney+'].max()}'\n
      The Year with Lowest TV Shows Count on 'Disney+' is '{disney_year_low_tvshows['Year'][0]}' : '{disney_year_low_tvshows['Disney+'].min()}'\n 
      ''')
      The Year with Highest TV Shows Count Ever Got is '2017' : '605'

      The Year with Lowest TV Shows Count Ever Got is '1901' : '1'

      
      The Year with Highest TV Shows Count on 'Netflix' is '2018' : '271'

      The Year with Lowest TV Shows Count on 'Netflix' is '1970' : '0'

      
      The Year with Highest TV Shows Count on 'Hulu' is '2018' : '129'

      The Year with Lowest TV Shows Count on 'Hulu' is '1901' : '0'

      
      The Year with Highest TV Shows Count on 'Prime Video' is '2017' : '276'

      The Year with Lowest TV Shows Count on 'Prime Video' is '1914' : '0'

      
      The Year with Highest TV Shows Count on 'Disney+' is '2019' : '21'

      The Year with Lowest TV Shows Count on 'Disney+' is '1970' : '0'
 
      
In [58]:
print(f'''
      Accross All Platforms the Average TV Shows Count of Year is '{round(year_data_tvshows['TV Shows Count'].mean(), ndigits = 2)}'\n
      The Average TV Shows Count of Year on 'Netflix' is '{round(netflix_year_tvshows['Netflix'].mean(), ndigits = 2)}'\n
      The Average TV Shows Count of Year on 'Hulu' is '{round(hulu_year_tvshows['Hulu'].mean(), ndigits = 2)}'\n
      The Average TV Shows Count of Year on 'Prime Video' is '{round(prime_video_year_tvshows['Prime Video'].mean(), ndigits = 2)}'\n
      The Average TV Shows Count of Year on 'Disney+' is '{round(disney_year_tvshows['Disney+'].mean(), ndigits = 2)}'\n 
      ''')
      Accross All Platforms the Average TV Shows Count of Year is '61.03'

      The Average TV Shows Count of Year on 'Netflix' is '37.84'

      The Average TV Shows Count of Year on 'Hulu' is '23.49'

      The Average TV Shows Count of Year on 'Prime Video' is '24.9'

      The Average TV Shows Count of Year on 'Disney+' is '4.46'
 
      
In [59]:
print(f'''
      Accross All Platforms Total Count of Year is '{year_data_tvshows['Year'].unique().shape[0]}'\n
      Total Count of Year on 'Netflix' is '{netflix_year_tvshows['Year'].unique().shape[0]}'\n
      Total Count of Year on 'Hulu' is '{hulu_year_tvshows['Year'].unique().shape[0]}'\n
      Total Count of Year on 'Prime Video' is '{prime_video_year_tvshows['Year'].unique().shape[0]}'\n
      Total Count of Year on 'Disney+' is '{disney_year_tvshows['Year'].unique().shape[0]}'\n 
      ''')
      Accross All Platforms Total Count of Year is '89'

      Total Count of Year on 'Netflix' is '49'

      Total Count of Year on 'Hulu' is '68'

      Total Count of Year on 'Prime Video' is '88'

      Total Count of Year on 'Disney+' is '41'
 
      
In [60]:
fig = plt.figure(figsize = (20, 10))
sns.lineplot(data = year_data_tvshows, x = 'Year', y = 'TV Shows Count')
plt.show()
In [61]:
plt.figure(figsize = (20, 10))
sns.lineplot(x = year_data_tvshows['Year'], y = year_data_tvshows['Netflix'], color = 'red')
sns.lineplot(x = year_data_tvshows['Year'], y = year_data_tvshows['Hulu'], color = 'lightgreen')
sns.lineplot(x = year_data_tvshows['Year'], y = year_data_tvshows['Prime Video'], color = 'lightblue')
sns.lineplot(x = year_data_tvshows['Year'], y = year_data_tvshows['Disney+'], color = 'darkblue')
plt.xlabel('Release Year', fontsize = 15)
plt.ylabel('TV Shows Count', fontsize = 15)
plt.show()
In [62]:
fig, axes = plt.subplots(2, 2,figsize=(20 ,20))
 
n_y_ax1 = sns.lineplot(x = year_data_tvshows['Year'], y = year_data_tvshows['Netflix'], color = 'red', ax = axes[0, 0])
h_y_ax2 = sns.lineplot(x = year_data_tvshows['Year'], y = year_data_tvshows['Hulu'], color = 'lightgreen', ax = axes[0, 1])
p_y_ax3 = sns.lineplot(x = year_data_tvshows['Year'], y = year_data_tvshows['Prime Video'], color = 'lightblue', ax = axes[1, 0])
d_y_ax4 = sns.lineplot(x = year_data_tvshows['Year'], y = year_data_tvshows['Disney+'], color = 'darkblue', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_y_ax1.title.set_text(labels[0])
h_y_ax2.title.set_text(labels[1])
p_y_ax3.title.set_text(labels[2])
d_y_ax4.title.set_text(labels[3])
 
plt.show()
In [63]:
def round_val(data):
    if str(data) != 'nan':
        return round(data)
        
def round_fix(data):
    if data in range(1801,1901):
        # print(data)
        return 1900
    if data in range(1901,1911):
        return 1910
    if data in range(1911,1921):
        return 1920
    if data in range(1921,1931):
        return 1930
    if data in range(1931,1941):
        return 1940
    if data in range(1941,1951):
        return 1950
    if data in range(1951,1961):
        return 1960
    if data in range(1961,1971):
        return 1970
    if data in range(1971,1981):
        return 1980
    if data in range(1981,1991):
        return 1990
    if data in range(1991,2001):
        return 2000
    if data in range(2000,2011):
        return 2010
    if data in range(2010,2021):
        return 2020
    if data in range(2020,2031):
        return 2030
    else:
        return 2100
In [64]:
df_tvshows_years_group['Year Group'] = df_tvshows_years_group['Year'].apply(round_fix).astype(int)
 
years_values = df_tvshows_years_group['Year Group'].value_counts().sort_index(ascending = False).tolist()
years_index = df_tvshows_years_group['Year Group'].value_counts().sort_index(ascending = False).index
 
# years_values, years_index
In [65]:
years_group_count = df_tvshows_years_group.groupby('Year Group')['Title'].count()
years_group_tvshows = df_tvshows_years_group.groupby('Year Group')[['Netflix', 'Hulu', 'Prime Video', 'Disney+']].sum()
years_group_data_tvshows = pd.concat([years_group_count, years_group_tvshows], axis = 1).reset_index().rename(columns = {'Title' : 'TV Shows Count'})
years_group_data_tvshows = years_group_data_tvshows.sort_values(by = 'TV Shows Count', ascending = False)
In [66]:
# Year Group with TV Shows Counts - All Platforms Combined
years_group_data_tvshows.sort_values(by = 'TV Shows Count', ascending = False)
Out[66]:
Year Group TV Shows Count Netflix Hulu Prime Video Disney+
11 2020 3864 1577 1006 1348 114
10 2010 985 213 384 487 33
9 2000 271 34 103 155 20
8 1990 126 19 48 70 11
7 1980 60 4 19 38 2
6 1970 56 4 24 34 2
5 1960 42 2 10 35 1
4 1950 16 0 3 13 0
3 1940 7 0 0 7 0
0 1910 2 0 0 2 0
2 1930 2 0 0 2 0
1 1920 1 1 0 0 0
In [67]:
years_group_data_tvshows.sort_values(by = 'Year Group', ascending = False)
Out[67]:
Year Group TV Shows Count Netflix Hulu Prime Video Disney+
11 2020 3864 1577 1006 1348 114
10 2010 985 213 384 487 33
9 2000 271 34 103 155 20
8 1990 126 19 48 70 11
7 1980 60 4 19 38 2
6 1970 56 4 24 34 2
5 1960 42 2 10 35 1
4 1950 16 0 3 13 0
3 1940 7 0 0 7 0
2 1930 2 0 0 2 0
1 1920 1 1 0 0 0
0 1910 2 0 0 2 0
In [68]:
fig = px.bar(y = years_group_data_tvshows['TV Shows Count'],
             x = years_group_data_tvshows['Year Group'], 
             color = years_group_data_tvshows['Year Group'],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows Count', 'x' : 'Year : In Minutes'},
             title  = 'TV Shows with Group Year in Minutes : All Platforms')

fig.update_layout(plot_bgcolor = "white")
fig.show()
In [69]:
fig = px.pie(years_group_data_tvshows[:10],
             names = years_group_data_tvshows['Year Group'],
             values = years_group_data_tvshows['TV Shows Count'],
             color = years_group_data_tvshows['TV Shows Count'],
             color_discrete_sequence = px.colors.sequential.Teal)

fig.update_traces(textinfo = 'percent+label',
                  title = 'TV Shows Count based on Year Group')
fig.show()
In [70]:
df_years_group_high_tvshows = years_group_data_tvshows.sort_values(by = 'TV Shows Count', ascending = False).reset_index()
df_years_group_high_tvshows = df_years_group_high_tvshows.drop(['index'], axis = 1)
# filter = (years_group_data_tvshows['TV Shows Count'] ==  (years_group_data_tvshows['TV Shows Count'].max()))
# df_years_group_high_tvshows = years_group_data_tvshows[filter]
 
# highest_rated_tvshows = years_group_data_tvshows.loc[years_group_data_tvshows['TV Shows Count'].idxmax()]
 
# print('\nYear with Highest Ever TV Shows Count are : All Platforms Combined\n')
df_years_group_high_tvshows.head(5)
Out[70]:
Year Group TV Shows Count Netflix Hulu Prime Video Disney+
0 2020 3864 1577 1006 1348 114
1 2010 985 213 384 487 33
2 2000 271 34 103 155 20
3 1990 126 19 48 70 11
4 1980 60 4 19 38 2
In [71]:
df_years_group_low_tvshows = years_group_data_tvshows.sort_values(by = 'TV Shows Count', ascending = True).reset_index()
df_years_group_low_tvshows = df_years_group_low_tvshows.drop(['index'], axis = 1)
# filter = (years_group_data_tvshows['TV Shows Count'] = =  (years_group_data_tvshows['TV Shows Count'].min()))
# df_years_group_low_tvshows = years_group_data_tvshows[filter]
 
# print('\nYear with Lowest Ever TV Shows Count are : All Platforms Combined\n')
df_years_group_low_tvshows.head(5)
Out[71]:
Year Group TV Shows Count Netflix Hulu Prime Video Disney+
0 1920 1 1 0 0 0
1 1910 2 0 0 2 0
2 1930 2 0 0 2 0
3 1940 7 0 0 7 0
4 1950 16 0 3 13 0
In [72]:
print(f'''
      Total '{df_tvshows_years['Year'].count()}' Titles are available on All Platforms, out of which\n
      You Can Choose to see TV Shows from Total '{years_group_data_tvshows['Year Group'].unique().shape[0]}' Year Group, They were Like this, \n
 
      {years_group_data_tvshows.sort_values(by = 'TV Shows Count', ascending = False)['Year Group'].unique()} etc. \n
 
      The Year Group with Highest TV Shows Count have '{years_group_data_tvshows['TV Shows Count'].max()}' TV Shows Available is '{df_years_group_high_tvshows['Year Group'][0]}', &\n
      The Year Group with Lowest TV Shows Count have '{years_group_data_tvshows['TV Shows Count'].min()}' TV Shows Available is '{df_years_group_low_tvshows['Year Group'][0]}'
      ''')
      Total '5432' Titles are available on All Platforms, out of which

      You Can Choose to see TV Shows from Total '12' Year Group, They were Like this, 

 
      [2020 2010 2000 1990 1980 1970 1960 1950 1940 1910 1930 1920] etc. 

 
      The Year Group with Highest TV Shows Count have '3864' TV Shows Available is '2020', &

      The Year Group with Lowest TV Shows Count have '1' TV Shows Available is '1920'
      
In [73]:
netflix_years_group_tvshows = years_group_data_tvshows[years_group_data_tvshows['Netflix'] !=  0].sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_years_group_tvshows = netflix_years_group_tvshows.drop(['index', 'Hulu', 'Prime Video', 'Disney+', 'TV Shows Count'], axis = 1)
 
netflix_years_group_high_tvshows = df_years_group_high_tvshows.sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_years_group_high_tvshows = netflix_years_group_high_tvshows.drop(['index'], axis = 1)
 
netflix_years_group_low_tvshows = df_years_group_high_tvshows.sort_values(by = 'Netflix', ascending = True).reset_index()
netflix_years_group_low_tvshows = netflix_years_group_low_tvshows.drop(['index'], axis = 1)
 
netflix_years_group_high_tvshows.head(5)
Out[73]:
Year Group TV Shows Count Netflix Hulu Prime Video Disney+
0 2020 3864 1577 1006 1348 114
1 2010 985 213 384 487 33
2 2000 271 34 103 155 20
3 1990 126 19 48 70 11
4 1980 60 4 19 38 2
In [74]:
hulu_years_group_tvshows = years_group_data_tvshows[years_group_data_tvshows['Hulu'] !=  0].sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_years_group_tvshows = hulu_years_group_tvshows.drop(['index', 'Netflix', 'Prime Video', 'Disney+', 'TV Shows Count'], axis = 1)
 
hulu_years_group_high_tvshows = df_years_group_high_tvshows.sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_years_group_high_tvshows = hulu_years_group_high_tvshows.drop(['index'], axis = 1)
 
hulu_years_group_low_tvshows = df_years_group_high_tvshows.sort_values(by = 'Hulu', ascending = True).reset_index()
hulu_years_group_low_tvshows = hulu_years_group_low_tvshows.drop(['index'], axis = 1)
 
hulu_years_group_high_tvshows.head(5)
Out[74]:
Year Group TV Shows Count Netflix Hulu Prime Video Disney+
0 2020 3864 1577 1006 1348 114
1 2010 985 213 384 487 33
2 2000 271 34 103 155 20
3 1990 126 19 48 70 11
4 1970 56 4 24 34 2
In [75]:
prime_video_years_group_tvshows = years_group_data_tvshows[years_group_data_tvshows['Prime Video'] !=  0].sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_years_group_tvshows = prime_video_years_group_tvshows.drop(['index', 'Netflix', 'Hulu', 'Disney+', 'TV Shows Count'], axis = 1)
 
prime_video_years_group_high_tvshows = df_years_group_high_tvshows.sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_years_group_high_tvshows = prime_video_years_group_high_tvshows.drop(['index'], axis = 1)
 
prime_video_years_group_low_tvshows = df_years_group_high_tvshows.sort_values(by = 'Prime Video', ascending = True).reset_index()
prime_video_years_group_low_tvshows = prime_video_years_group_low_tvshows.drop(['index'], axis = 1)
 
prime_video_years_group_high_tvshows.head(5)
Out[75]:
Year Group TV Shows Count Netflix Hulu Prime Video Disney+
0 2020 3864 1577 1006 1348 114
1 2010 985 213 384 487 33
2 2000 271 34 103 155 20
3 1990 126 19 48 70 11
4 1980 60 4 19 38 2
In [76]:
disney_years_group_tvshows = years_group_data_tvshows[years_group_data_tvshows['Disney+'] !=  0].sort_values(by = 'Disney+', ascending = False).reset_index()
disney_years_group_tvshows = disney_years_group_tvshows.drop(['index', 'Netflix', 'Hulu', 'Prime Video', 'TV Shows Count'], axis = 1)
 
disney_years_group_high_tvshows = df_years_group_high_tvshows.sort_values(by = 'Disney+', ascending = False).reset_index()
disney_years_group_high_tvshows = disney_years_group_high_tvshows.drop(['index'], axis = 1)
 
disney_years_group_low_tvshows = df_years_group_high_tvshows.sort_values(by = 'Disney+', ascending = True).reset_index()
disney_years_group_low_tvshows = disney_years_group_low_tvshows.drop(['index'], axis = 1)
 
disney_years_group_high_tvshows.head(5)
Out[76]:
Year Group TV Shows Count Netflix Hulu Prime Video Disney+
0 2020 3864 1577 1006 1348 114
1 2010 985 213 384 487 33
2 2000 271 34 103 155 20
3 1990 126 19 48 70 11
4 1980 60 4 19 38 2
In [77]:
print(f'''
      The Year Group with Highest TV Shows Count Ever Got is '{df_years_group_high_tvshows['Year Group'][0]}' : '{df_years_group_high_tvshows['TV Shows Count'].max()}'\n
      The Year Group with Lowest TV Shows Count Ever Got is '{df_years_group_low_tvshows['Year Group'][0]}' : '{df_years_group_low_tvshows['TV Shows Count'].min()}'\n
      
      The Year Group with Highest TV Shows Count on 'Netflix' is '{netflix_years_group_high_tvshows['Year Group'][0]}' : '{netflix_years_group_high_tvshows['Netflix'].max()}'\n
      The Year Group with Lowest TV Shows Count on 'Netflix' is '{netflix_years_group_low_tvshows['Year Group'][0]}' : '{netflix_years_group_low_tvshows['Netflix'].min()}'\n
      
      The Year Group with Highest TV Shows Count on 'Hulu' is '{hulu_years_group_high_tvshows['Year Group'][0]}' : '{hulu_years_group_high_tvshows['Hulu'].max()}'\n
      The Year Group with Lowest TV Shows Count on 'Hulu' is '{hulu_years_group_low_tvshows['Year Group'][0]}' : '{hulu_years_group_low_tvshows['Hulu'].min()}'\n
      
      The Year Group with Highest TV Shows Count on 'Prime Video' is '{prime_video_years_group_high_tvshows['Year Group'][0]}' : '{prime_video_years_group_high_tvshows['Prime Video'].max()}'\n
      The Year Group with Lowest TV Shows Count on 'Prime Video' is '{prime_video_years_group_low_tvshows['Year Group'][0]}' : '{prime_video_years_group_low_tvshows['Prime Video'].min()}'\n
      
      The Year Group with Highest TV Shows Count on 'Disney+' is '{disney_years_group_high_tvshows['Year Group'][0]}' : '{disney_years_group_high_tvshows['Disney+'].max()}'\n
      The Year Group with Lowest TV Shows Count on 'Disney+' is '{disney_years_group_low_tvshows['Year Group'][0]}' : '{disney_years_group_low_tvshows['Disney+'].min()}'\n 
      ''')
      The Year Group with Highest TV Shows Count Ever Got is '2020' : '3864'

      The Year Group with Lowest TV Shows Count Ever Got is '1920' : '1'

      
      The Year Group with Highest TV Shows Count on 'Netflix' is '2020' : '1577'

      The Year Group with Lowest TV Shows Count on 'Netflix' is '1950' : '0'

      
      The Year Group with Highest TV Shows Count on 'Hulu' is '2020' : '1006'

      The Year Group with Lowest TV Shows Count on 'Hulu' is '1940' : '0'

      
      The Year Group with Highest TV Shows Count on 'Prime Video' is '2020' : '1348'

      The Year Group with Lowest TV Shows Count on 'Prime Video' is '1920' : '0'

      
      The Year Group with Highest TV Shows Count on 'Disney+' is '2020' : '114'

      The Year Group with Lowest TV Shows Count on 'Disney+' is '1950' : '0'
 
      
In [78]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_ru_ax1 = sns.barplot(x = netflix_years_group_tvshows['Year Group'], y = netflix_years_group_tvshows['Netflix'], palette = 'Reds_r', ax = axes[0, 0])
h_ru_ax2 = sns.barplot(x = hulu_years_group_tvshows['Year Group'], y = hulu_years_group_tvshows['Hulu'], palette = 'Greens_r', ax = axes[0, 1])
p_ru_ax3 = sns.barplot(x = prime_video_years_group_tvshows['Year Group'], y = prime_video_years_group_tvshows['Prime Video'], palette = 'Blues_r', ax = axes[1, 0])
d_ru_ax4 = sns.barplot(x = disney_years_group_tvshows['Year Group'], y = disney_years_group_tvshows['Disney+'], palette = 'BuPu_r', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_ru_ax1.title.set_text(labels[0])
h_ru_ax2.title.set_text(labels[1])
p_ru_ax3.title.set_text(labels[2])
d_ru_ax4.title.set_text(labels[3])
 
plt.show()
In [79]:
plt.figure(figsize = (20, 5))
sns.lineplot(x = years_group_data_tvshows['Year Group'], y = years_group_data_tvshows['Netflix'], color = 'red')
sns.lineplot(x = years_group_data_tvshows['Year Group'], y = years_group_data_tvshows['Hulu'], color = 'lightgreen')
sns.lineplot(x = years_group_data_tvshows['Year Group'], y = years_group_data_tvshows['Prime Video'], color = 'lightblue')
sns.lineplot(x = years_group_data_tvshows['Year Group'], y = years_group_data_tvshows['Disney+'], color = 'darkblue')
plt.xlabel('Year Group', fontsize = 15)
plt.ylabel('TV Shows Count', fontsize = 15)
plt.show()
In [80]:
print(f'''
      Accross All Platforms Total Count of Year Group is '{years_group_data_tvshows['Year Group'].unique().shape[0]}'\n
      Total Count of Year Group on 'Netflix' is '{netflix_years_group_tvshows['Year Group'].unique().shape[0]}'\n
      Total Count of Year Group on 'Hulu' is '{hulu_years_group_tvshows['Year Group'].unique().shape[0]}'\n
      Total Count of Year Group on 'Prime Video' is '{prime_video_years_group_tvshows['Year Group'].unique().shape[0]}'\n
      Total Count of Year Group on 'Disney+' is '{disney_years_group_tvshows['Year Group'].unique().shape[0]}'\n 
      ''')
      Accross All Platforms Total Count of Year Group is '12'

      Total Count of Year Group on 'Netflix' is '8'

      Total Count of Year Group on 'Hulu' is '8'

      Total Count of Year Group on 'Prime Video' is '11'

      Total Count of Year Group on 'Disney+' is '7'
 
      
In [81]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_ru_ax1 = sns.lineplot(y = years_group_data_tvshows['Year Group'], x = years_group_data_tvshows['Netflix'], color = 'red', ax = axes[0, 0])
h_ru_ax2 = sns.lineplot(y = years_group_data_tvshows['Year Group'], x = years_group_data_tvshows['Hulu'], color = 'lightgreen', ax = axes[0, 1])
p_ru_ax3 = sns.lineplot(y = years_group_data_tvshows['Year Group'], x = years_group_data_tvshows['Prime Video'], color = 'lightblue', ax = axes[1, 0])
d_ru_ax4 = sns.lineplot(y = years_group_data_tvshows['Year Group'], x = years_group_data_tvshows['Disney+'], color = 'darkblue', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_ru_ax1.title.set_text(labels[0])
h_ru_ax2.title.set_text(labels[1])
p_ru_ax3.title.set_text(labels[2])
d_ru_ax4.title.set_text(labels[3])

plt.show()
In [82]:
fig, axes = plt.subplots(2, 2,figsize=(20 ,20))
 
n_yg_ax1 = sns.lineplot(x = years_group_data_tvshows['Year Group'], y = years_group_data_tvshows['Netflix'], color = 'red', ax = axes[0, 0])
h_yg_ax2 = sns.lineplot(x = years_group_data_tvshows['Year Group'], y = years_group_data_tvshows['Hulu'], color = 'lightgreen', ax = axes[0, 1])
p_yg_ax3 = sns.lineplot(x = years_group_data_tvshows['Year Group'], y = years_group_data_tvshows['Prime Video'], color = 'lightblue', ax = axes[1, 0])
d_yg_ax4 = sns.lineplot(x = years_group_data_tvshows['Year Group'], y = years_group_data_tvshows['Disney+'], color = 'darkblue', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_yg_ax1.title.set_text(labels[0])
h_yg_ax2.title.set_text(labels[1])
p_yg_ax3.title.set_text(labels[2])
d_yg_ax4.title.set_text(labels[3])
 
plt.show()
In [83]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_ru_ax1 = sns.barplot(x = years_group_data_tvshows['Year Group'], y = years_group_data_tvshows['Netflix'], palette = 'Reds_r', ax = axes[0, 0])
h_ru_ax2 = sns.barplot(x = years_group_data_tvshows['Year Group'], y = years_group_data_tvshows['Hulu'], palette = 'Greens_r', ax = axes[0, 1])
p_ru_ax3 = sns.barplot(x = years_group_data_tvshows['Year Group'], y = years_group_data_tvshows['Prime Video'], palette = 'Blues_r', ax = axes[1, 0])
d_ru_ax4 = sns.barplot(x = years_group_data_tvshows['Year Group'], y = years_group_data_tvshows['Disney+'], palette = 'BuPu_r', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_ru_ax1.title.set_text(labels[0])
h_ru_ax2.title.set_text(labels[1])
p_ru_ax3.title.set_text(labels[2])
d_ru_ax4.title.set_text(labels[3])

plt.show()